Statistical data analysis pt. 1

# Reading the pre-prepared dataset

all <- read_excel("che_covid19.xlsx")[,2:40] %>% relocate(mortality, .before = `Country Code`)
all
# There are some extra variables, but we will not use all of them

to_use <- c("mortality", "che", "beds", "pop65", "popdens", "urban", "dphe", "dghe", "tobacco", "procur", "doctors", "nurses", "beh_stayhome", "beh_socgathering", "beh_distance", "beh_tellsymp", "beh_handwash", "fob_social", "fob_handshake", "fob_stores", "fob_curfew", "perceivedreaction_d", "govtrust_d", "govfact_d", "perceivedeffectiveness_d", "region", "population")

# Selecting columns we plan to use

data <- subset(all, select=to_use)
data
# Alleviating the problems in the next function caused by "_" in column names
colnames(data) <- gsub("_", ".", colnames(data))

# Calculating descriptive statistics
number <- function(x, na.rm = TRUE){return(sum(!is.na(x)))}

stats <- data %>%
  summarise(across(where(is.numeric), 
                   list(mean = mean, median = median, sd = sd, min = min, max = max, Q1=~quantile(., probs = 0.25), Q3=~quantile(., probs = 0.75)), 
                   na.rm = TRUE)) %>% 
  pivot_longer(everything(), names_to = "name", values_to = "value") %>% 
  separate(name, c("variable", "statistic"), sep = "_") %>%
  pivot_wider(names_from = statistic, values_from = value) %>%
  arrange(variable) %>% 
  select(variable, mean, sd, min, Q1, median, Q3, max)

# Changing column names back
colnames(data) <- gsub("\\.", "_", colnames(data))
data

# Changing variable names to match
stats$variable <- gsub("\\.", "_", stats$variable)

options(scipen=999) # Disabling scientific notation (e.g., e+01)

# Putting variables in the original order
stats = na.omit(stats[match(to_use, stats$variable),])
stats
round_df <- function(x, digits) {
    # round all numeric variables
    # x: data frame 
    # digits: number of digits to round
    numeric_columns <- sapply(x, mode) == 'numeric'
    x[numeric_columns] <-  round(x[numeric_columns], digits)
    x
}

# Rounding the numbers
round_df(stats, 3)
variable mean sd min Q1 median Q3 max
mortality 134.616 106.706 0.390 50.638 125.660 204.732 605.680
che 7.623 2.580 2.495 5.689 7.540 9.260 16.885
beds 3.871 2.506 0.630 2.203 3.120 5.128 13.050
pop65 14.778 5.963 1.523 9.130 15.593 19.762 28.002
popdens 296.441 1043.790 3.576 34.626 99.851 218.329 8044.526
urban 75.385 16.109 18.585 66.545 79.732 87.029 100.000
dphe 35.833 14.427 13.667 25.914 34.440 48.149 70.604
dghe 63.963 14.562 28.732 51.746 64.810 74.086 85.323
tobacco 24.247 8.766 7.900 18.325 23.500 28.875 44.700
procur 0.517 0.504 0.000 0.000 1.000 1.000 1.000
doctors 33.812 16.049 4.650 23.620 32.370 43.598 80.130
nurses 69.248 50.026 2.800 26.465 61.645 102.375 216.700
beh_stayhome 83.462 7.333 58.678 81.242 84.828 87.905 94.411
beh_socgathering 92.326 4.991 75.297 90.952 94.353 95.561 99.000
beh_distance 78.763 9.116 47.866 74.073 81.322 85.271 90.561
beh_tellsymp 92.846 4.281 78.447 92.823 94.256 95.094 97.724
beh_handwash 91.686 2.830 83.720 90.474 92.055 93.745 96.566
fob_social 0.976 0.038 0.786 0.976 0.989 0.994 1.000
fob_handshake 0.967 0.037 0.745 0.959 0.977 0.986 1.000
fob_stores 0.806 0.167 0.197 0.768 0.870 0.910 0.971
fob_curfew 0.714 0.194 0.159 0.592 0.741 0.875 0.990
perceivedreaction_d 0.403 0.233 0.000 0.232 0.357 0.565 0.908
govtrust_d 0.575 0.245 0.090 0.376 0.584 0.799 0.957
govfact_d 0.633 0.239 0.092 0.491 0.709 0.823 0.979
perceivedeffectiveness_d 0.888 0.054 0.696 0.854 0.900 0.926 0.966
population 64586735.900 187687848.304 360563.000 5427584.250 10730269.500 47935001.500 1397715000.000
# Computing the correlation matrix for the numeric variables (all except region)
CorMatrix = cor(data[, !names(data) %in% c("region")] , use = "complete.obs")

cor.mtest <- function(mat, ...) {
    mat <- as.matrix(mat)
    n <- ncol(mat)
    p.mat<- matrix(NA, n, n)
    diag(p.mat) <- 0
    for (i in 1:(n - 1)) {
        for (j in (i + 1):n) {
            tmp <- cor.test(mat[, i], mat[, j], ...)
            p.mat[i, j] <- p.mat[j, i] <- tmp$p.value
        }
    }
  colnames(p.mat) <- rownames(p.mat) <- colnames(mat)
  p.mat
}

# Matrix of p-values (H0: correlation = 0)
p.mat <- cor.mtest(data[, !names(data) %in% c("region")])

CorMatrix<-round(CorMatrix,2)

col <- colorRampPalette(c("deeppink", "hotpink", "lightpink", "floralwhite", "darkseagreen1", "darkslategray2", "dodgerblue4"))

#png(file="corr.png", res=300, width=4500, height=4500)
corrplot(CorMatrix, method="color", col=col(200),  
         type="upper", order="original", 
         addCoef.col = "black", # Add coefficient of correlation
         tl.col="black", tl.srt=45, # Text label color and rotation
         # Combine with significance
         p.mat = p.mat, sig.level = c(0.01, 0.05, 0.1), insig ="label_sig",
         # Hide correlation coefficient on the principal diagonal
         diag=FALSE , number.cex=0.9, tl.cex=1, pch.cex=1)

Correlogram

# Exploring region

data %>% ggplot(aes(y = mortality, x = region)) + 
  geom_boxplot()

# Drawing a map
theme_set(theme_bw())

thismap = map_data("world")

all$`Country Name` <- recode(all$`Country Name`, "'Egypt, Arab Rep.' = 'Egypt'; 'United Kingdom' = 'UK'; 'Korea, Rep.' = 'South Korea'; 'Russian Federation' = 'Russia'; 'Slovak Republic' = 'Slovakia'; 'United States' = 'USA'")

# Setting colors
thismap <- mutate(thismap, fill = ifelse(region %in% all$`Country Name`[all$region == 'Americas'], "#FF7F11", ifelse(region %in% all$`Country Name`[all$region == 'Europe'], "#1446A0", ifelse(region %in% all$`Country Name`[all$region == 'Western Pacific'], "#DB3069", ifelse(region %in% all$`Country Name`[all$region == 'South-East Asia'], "#00AF54", ifelse(region %in% all$`Country Name`[all$region == 'Eastern Mediterranean'], "#F5D547", "white"))))))

# Using scale_fill_identity to set correct colors
#png(file="map.png", res=300, width=4500, height=3000)
ggplot(thismap, aes(long, lat, fill = fill, group=group)) + 
  geom_polygon(colour="gray") + 
  scale_fill_identity("WHO Region", guide = "legend", labels = c("South-East Asia", "Europe", "Western Pacific", "Eastern Mediterranean", "Americas", "unavailable")) +
theme(legend.position = "bottom", legend.key.size = unit(1,"cm"),   legend.title=element_text(size=30), 
    legend.text=element_text(size=25))

World Map

formattable(data %>% group_by(region) %>%
     summarise(no_rows = length(region)))
region no_rows
Americas 12
Eastern Mediterranean 7
Europe 33
South-East Asia 2
Western Pacific 6
# There is too few instances of the three regions, so it makes sense to make an "other" category.

data = data %>% mutate(region = case_when(data$region=="Americas" ~ "Americas", data$region=="Europe" ~ "Europe", TRUE ~ "Other"))
# Exploring procur
data %>% ggplot(aes(y = mortality, x =  as.factor(procur))) + 
  geom_boxplot() + scale_x_discrete(name = "procur")

# Drawing scatterplots of everything with mortality
#png(file="scatter.png", res=300, width=6000, height=8000)
data %>% dplyr::select(c(1:25, 27)) %>% 
  gather(-mortality, key = "var", value = "value") %>%
  ggplot(aes(x = value, y = mortality)) +
    geom_point() +
    facet_wrap(~ var, ncol=3, scales = "free", shrink=TRUE) +
    theme_bw() + 
    theme(axis.text = element_text(size = 14),
          axis.title = element_text( size = 16, face = "bold" ),
          legend.position="none",
          strip.text = element_text(size = 20))

Scatterplots

Statistical data analysis pt. 2

# Doing the same things again with the newly rebuilt dataset
allnew <- read_excel("che_covid19-new.xlsx")[,2:37] %>% relocate(mortality, .before = `Country Code`)
# Now when we select countries with no less than 20 respondents in the Global Behaviors and Perceptions survey, there are 96 complete observations
allnew <- filter(allnew, n >= 20)
allnew
# There are some extra variables, but we will not use all of them

to_use1 <- c("mortality", "che", "pop65", "popdens", "urban", "dphe", "dghe", "doctors", "nurses", "beh_stayhome", "beh_socgathering", "beh_distance", "beh_tellsymp", "beh_handwash", "fob_social", "fob_handshake", "fob_stores", "fob_curfew", "perceivedreaction_d", "govtrust_d", "govfact_d", "perceivedeffectiveness_d", "region", "population")

# Selecting columns we plan to use

datanew <- subset(allnew, select=to_use1)
datanew
# Alleviating the problems in the next function caused by "_" in column names
colnames(datanew) <- gsub("_", ".", colnames(datanew))

# Calculating descriptive statistics
number <- function(x, na.rm = TRUE){return(sum(!is.na(x)))}

statsnew <- datanew %>%
  summarise(across(where(is.numeric), 
                   list(mean = mean, median = median, sd = sd, min = min, max = max, Q1=~quantile(., probs = 0.25), Q3=~quantile(., probs = 0.75)), 
                   na.rm = TRUE)) %>% 
  pivot_longer(everything(), names_to = "name", values_to = "value") %>% 
  separate(name, c("variable", "statistic"), sep = "_") %>%
  pivot_wider(names_from = statistic, values_from = value) %>%
  arrange(variable) %>% 
  select(variable, mean, sd, min, Q1, median, Q3, max)

# Changing column names back
colnames(datanew) <- gsub("\\.", "_", colnames(datanew))
datanew

# Changing variable names to match
statsnew$variable <- gsub("\\.", "_", statsnew$variable)

options(scipen=999) # Disabling scientific notation (e.g., e+01)

# Putting variables in the original order
statsnew = na.omit(statsnew[match(to_use1, statsnew$variable),])
statsnew
round_df <- function(x, digits) {
    # round all numeric variables
    # x: data frame 
    # digits: number of digits to round
    numeric_columns <- sapply(x, mode) == 'numeric'
    x[numeric_columns] <-  round(x[numeric_columns], digits)
    x
}

# Rounding the numbers
round_df(statsnew, 3)
variable mean sd min Q1 median Q3 max
mortality 118.112 101.068 0.390 30.935 105.145 184.107 605.680
che 7.005 2.522 2.343 5.280 6.878 8.663 16.885
pop65 12.195 6.668 1.157 6.429 12.111 18.753 28.002
popdens 268.424 856.981 3.298 46.360 100.047 220.683 8044.526
urban 68.466 20.491 17.313 56.996 71.190 83.755 100.000
dphe 39.795 16.242 11.957 26.599 39.478 49.723 77.270
dghe 57.860 18.333 14.867 45.247 59.586 73.157 88.043
doctors 27.952 17.935 0.600 12.657 26.290 40.515 80.130
nurses 58.149 46.944 2.800 19.030 51.520 74.270 216.700
beh_stayhome 82.679 8.712 48.359 79.057 84.243 88.498 95.720
beh_socgathering 91.259 5.792 70.304 88.162 93.192 95.545 99.300
beh_distance 76.325 9.914 47.866 69.791 77.600 84.310 92.067
beh_tellsymp 92.308 4.673 78.447 90.600 93.771 95.013 99.289
beh_handwash 91.494 3.286 80.600 90.182 91.975 93.745 97.921
fob_social 0.978 0.033 0.786 0.977 0.989 0.995 1.000
fob_handshake 0.965 0.045 0.661 0.956 0.975 0.985 1.000
fob_stores 0.814 0.144 0.197 0.775 0.858 0.901 0.971
fob_curfew 0.747 0.179 0.159 0.645 0.776 0.892 1.000
perceivedreaction_d 0.403 0.245 0.000 0.215 0.368 0.565 0.952
govtrust_d 0.540 0.251 0.040 0.370 0.524 0.771 0.957
govfact_d 0.600 0.244 0.092 0.400 0.653 0.803 0.979
perceivedeffectiveness_d 0.873 0.070 0.619 0.837 0.890 0.922 1.000
population 69540265.083 201817550.622 360563.000 5658078.250 12160829.500 53931840.500 1397715000.000
# Computing the correlation matrix for the numeric variables (all except region)
CorMatrix1 = cor(datanew[, !names(datanew) %in% c("region")] , use = "complete.obs")

cor.mtest <- function(mat, ...) {
    mat <- as.matrix(mat)
    n <- ncol(mat)
    p.mat<- matrix(NA, n, n)
    diag(p.mat) <- 0
    for (i in 1:(n - 1)) {
        for (j in (i + 1):n) {
            tmp <- cor.test(mat[, i], mat[, j], ...)
            p.mat[i, j] <- p.mat[j, i] <- tmp$p.value
        }
    }
  colnames(p.mat) <- rownames(p.mat) <- colnames(mat)
  p.mat
}

# Matrix of p-values (H0: correlation = 0)
p.mat1 <- cor.mtest(datanew[, !names(datanew) %in% c("region")])

CorMatrix1<-round(CorMatrix1,2)

col <- colorRampPalette(c("deeppink", "hotpink", "lightpink", "floralwhite", "darkseagreen1", "darkslategray2", "dodgerblue4"))

#png(file="newcorr.png", res=300, width=4500, height=4500)
corrplot(CorMatrix1, method="color", col=col(200),  
         type="upper", order="original", 
         addCoef.col = "black", # Add coefficient of correlation
         tl.col="black", tl.srt=45, # Text label color and rotation
         # Combine with significance
         p.mat = p.mat1, sig.level = c(0.01, 0.05, 0.1), insig ="label_sig",
         # Hide correlation coefficient on the principal diagonal
         diag=FALSE , number.cex=0.9, tl.cex=1, pch.cex=1)
# Заново каждый раз, когда library(corrplot)
# trace(corrplot, edit=TRUE)
# заменить на 443 строке
# place_points = function(sig.locs, point) {
  #text(pos.pNew[, 1][sig.locs], pos.pNew[, 2][sig.locs], 
      # labels = point, col = pch.col, cex = pch.cex, 
       #lwd = 2)
# НА
#place_points = function(sig.locs, point) {
      #text(pos.pNew[, 1][sig.locs], (pos.pNew[, 2][sig.locs])+0.25, 
           #labels = point, col = pch.col, cex = pch.cex, 
           #lwd = 2)

New Correlogram

formattable(datanew %>% group_by(region) %>%
     summarise(no_rows = length(region)))
region no_rows
Africa 7
Americas 18
Eastern Mediterranean 13
Europe 43
South-East Asia 6
Western Pacific 9
# Exploring region

datanew %>% ggplot(aes(y = mortality, x = region)) + 
  geom_boxplot() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

# Drawing scatterplots of everything with mortality
#png(file="newscatter.png", res=300, width=6000, height=8000)
datanew %>% dplyr::select(c(1:22, 24)) %>% 
  gather(-mortality, key = "var", value = "value") %>%
  ggplot(aes(x = value, y = mortality)) +
    geom_point() +
    facet_wrap(~ var, ncol=3, scales = "free", shrink=TRUE) +
    theme_bw() + 
    theme(axis.text = element_text(size = 14),
          axis.title = element_text( size = 16, face = "bold" ),
          legend.position="none",
          strip.text = element_text(size = 20))

New Scatterplots

# Drawing a map
theme_set(theme_bw())

thismap1 = map_data("world")

allnew

allnew$`Country Name` <- recode(allnew$`Country Name`, "'Egypt, Arab Rep.' = 'Egypt'; 'United Kingdom' = 'UK'; 'Korea, Rep.' = 'South Korea'; 'Russian Federation' = 'Russia'; 'Slovak Republic' = 'Slovakia'; 'United States' = 'USA'; 'Iran, Islamic Rep.' = 'Iran'")

# Setting colors
thismap1 <- mutate(thismap1, fill = ifelse(region %in% allnew$`Country Name`[allnew$region == 'Americas'], "#FF7F11", ifelse(region %in% allnew$`Country Name`[allnew$region == 'Europe'], "#1446A0", ifelse(region %in% allnew$`Country Name`[allnew$region == 'Western Pacific'], "#DB3069", ifelse(region %in% allnew$`Country Name`[allnew$region == 'South-East Asia'], "#00AF54", ifelse(region %in% allnew$`Country Name`[allnew$region == 'Eastern Mediterranean'], "#F5D547", ifelse(region %in% allnew$`Country Name`[allnew$region == 'Africa'], "magenta", "white")))))))

# Using scale_fill_identity to set correct colors
#png(file="newmap (1).png", res=300, width=4500, height=3000)
ggplot(thismap1, aes(long, lat, fill = fill, group=group)) + 
  geom_polygon(colour="gray") + 
  scale_fill_identity("WHO Region", guide = "legend", labels = c("South-East Asia", "Europe", "Western Pacific", "Eastern Mediterranean", "Americas", "Africa", "u/a")) +
theme(legend.position = "bottom", legend.key.size = unit(1,"cm"),   legend.title=element_text(size=30), 
    legend.text=element_text(size=25))

New World Map

# Drawing a scatterplot over a limited range of density for better visibility
datanew %>% 
  ggplot(aes(popdens, mortality)) +
  geom_jitter(width = 0.25, alpha = 0.5) + xlim(0, 1000)

# Drawing a scatterplot over a limited range of population for better visibility
datanew %>% 
  ggplot(aes(population, mortality)) +
  geom_jitter(width = 0.25, alpha = 0.5) +  
  scale_x_continuous(labels = unit_format(unit = "M", scale = 1e-6), limits = c(0, 200000000), name = "population (mln.)")

Multiple regression analysis

allnew <- read_excel("che_covid19-new.xlsx")[,2:37] %>%
relocate(mortality, .before = `Country Code`)
# Now when we select countries with no less than 20 respondents in the Global Behaviors and Perceptions survey, there are 96 complete observations
allnew <- filter(allnew, n >= 20)
allnew

# Adding a new variable income level
incomelvl <- read_excel("incomelvl.xlsx")
incomelvl

allnew <- merge(x = allnew, y = incomelvl, by = "Country Code", all.x = TRUE) %>%
relocate(mortality, .before = `Country Code`)
allnew
# Exploring incomelvl
theme_set(theme_bw())

formattable(datanew %>% group_by(incomelvl) %>%
     summarise(no_rows = length(incomelvl)))
incomelvl no_rows
LIC 4
LMC 18
UMC 28
HIC 46
datanew %>% ggplot(aes(y = mortality, x = incomelvl)) + 
  geom_boxplot() #+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

datanew %>% ggplot(aes(y = che, x = incomelvl)) + 
  geom_boxplot()

reg_ols_1 <- lm(mortality ~ che, data = datanew) 

cov_ols_1 <- vcovHC(reg_ols_1, type = "HC0")
se_ols_1 <- sqrt(diag(cov_ols_1))

coeftest(reg_ols_1, df = Inf, vcov = cov_ols_1)

z test of coefficients:

            Estimate Std. Error z value   Pr(>|z|)    
(Intercept)  33.6226    23.7954  1.4130     0.1577    
che          12.0605     2.9917  4.0313 0.00005547 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
reg_ols_2 <- lm(mortality ~ che + pop65 + urban + doctors + nurses + dghe + popdens, data = datanew) 

cov_ols_2 <- vcovHC(reg_ols_2, type = "HC0")
se_ols_2 <- sqrt(diag(cov_ols_2))

coeftest(reg_ols_2, df = Inf, vcov = cov_ols_2)

z test of coefficients:

              Estimate Std. Error z value Pr(>|z|)  
(Intercept) -7.7990683 28.8566442 -0.2703  0.78695  
che          6.4684294  4.3189153  1.4977  0.13421  
pop65        2.9194319  2.5253820  1.1560  0.24767  
urban        0.5368160  0.7553619  0.7107  0.47729  
doctors      0.9794441  0.9662417  1.0137  0.31074  
nurses      -0.6215683  0.2906442 -2.1386  0.03247 *
dghe         0.3765267  0.6021621  0.6253  0.53178  
popdens     -0.0178004  0.0080409 -2.2137  0.02685 *
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
reg_ols_3 <- lm(mortality ~ che + pop65 + urban + doctors + nurses + dghe + popdens + region, data = datanew) 

cov_ols_3 <- vcovHC(reg_ols_3, type = "HC0")
se_ols_3 <- sqrt(diag(cov_ols_3))

coeftest(reg_ols_3, df = Inf, vcov = cov_ols_3)

z test of coefficients:

                               Estimate  Std. Error z value  Pr(>|z|)    
(Intercept)                   7.2935398  30.5413722  0.2388 0.8112541    
che                           0.6231485   6.1960348  0.1006 0.9198901    
pop65                         4.2552531   2.9769151  1.4294 0.1528844    
urban                         0.0192724   0.6831571  0.0282 0.9774940    
doctors                       0.1674631   0.8513386  0.1967 0.8440579    
nurses                       -0.4154245   0.2662049 -1.5605 0.1186313    
dghe                          0.1963998   0.5267080  0.3729 0.7092364    
popdens                      -0.0035068   0.0056073 -0.6254 0.5317112    
regionAmericas              138.9955990  41.0764961  3.3838 0.0007148 ***
regionEastern Mediterranean  37.8998494  32.4633225  1.1675 0.2430219    
regionEurope                 77.3828482  37.8600206  2.0439 0.0409615 *  
regionSouth-East Asia        -2.2814351  25.9090979 -0.0881 0.9298327    
regionWestern Pacific       -36.4469465  35.1404621 -1.0372 0.2996525    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
reg_ols_4 <- lm(mortality ~ che + pop65 + urban + doctors + nurses + dghe + popdens + region + beh_stayhome + beh_socgathering + beh_distance + beh_tellsymp + beh_handwash + fob_curfew, data = datanew) 

cov_ols_4 <- vcovHC(reg_ols_4, type = "HC0")
se_ols_4 <- sqrt(diag(cov_ols_4))

coeftest(reg_ols_4, df = Inf, vcov = cov_ols_4)

z test of coefficients:

                               Estimate  Std. Error z value Pr(>|z|)   
(Intercept)                 315.6203964 194.6502705  1.6215  0.10492   
che                          -4.4513361   7.2794672 -0.6115  0.54087   
pop65                         6.0516146   3.3179411  1.8239  0.06817 . 
urban                         0.4242408   0.7379454  0.5749  0.56536   
doctors                       0.3825550   0.8009964  0.4776  0.63294   
nurses                       -0.1422158   0.2687819 -0.5291  0.59673   
dghe                         -0.4739245   0.6012689 -0.7882  0.43058   
popdens                      -0.0080841   0.0066393 -1.2176  0.22337   
regionAmericas              108.9997804  39.8089597  2.7381  0.00618 **
regionEastern Mediterranean -16.6879280  40.3347231 -0.4137  0.67907   
regionEurope                 42.1476912  43.6497995  0.9656  0.33425   
regionSouth-East Asia       -35.1785413  37.1911593 -0.9459  0.34421   
regionWestern Pacific       -57.3234413  38.3044024 -1.4965  0.13452   
beh_stayhome                  3.3388241   1.7920384  1.8631  0.06244 . 
beh_socgathering             -4.8276446   2.1844828 -2.2100  0.02711 * 
beh_distance                  1.5145785   1.0802867  1.4020  0.16091   
beh_tellsymp                  1.2667673   1.6091127  0.7872  0.43114   
beh_handwash                 -4.2854623   2.3890173 -1.7938  0.07284 . 
fob_curfew                   70.4266393  73.7354035  0.9551  0.33951   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
reg_ols_5 <- lm(mortality ~ che + pop65 + urban + doctors + nurses + dghe + popdens + region + beh_stayhome + beh_socgathering + beh_distance + beh_tellsymp + beh_handwash + fob_curfew + incomelvl + che*incomelvl, data = datanew)

cov_ols_5 <- vcovHC(reg_ols_5, type = "HC0")
se_ols_5 <- sqrt(diag(cov_ols_5))

coeftest(reg_ols_5, df = Inf, vcov = cov_ols_5)

z test of coefficients:

                                Estimate   Std. Error z value Pr(>|z|)  
(Intercept)                  438.0531775  213.6556502  2.0503  0.04034 *
che                          -19.4257130    9.6939004 -2.0039  0.04508 *
pop65                          8.2430275    3.3517601  2.4593  0.01392 *
urban                         -0.3455764    0.7613494 -0.4539  0.64990  
doctors                        0.5677534    0.6779376  0.8375  0.40233  
nurses                         0.2828706    0.2333618  1.2122  0.22545  
dghe                          -0.3514454    0.5467960 -0.6427  0.52040  
popdens                       -0.0022406    0.0049432 -0.4533  0.65035  
regionAmericas                69.7980221   31.8857323  2.1890  0.02860 *
regionEastern Mediterranean  -30.2692253   26.3589037 -1.1483  0.25082  
regionEurope                 -11.9103576   42.0379380 -0.2833  0.77693  
regionSouth-East Asia        -58.4882922   39.3263065 -1.4873  0.13695  
regionWestern Pacific        -87.7247057   35.6810732 -2.4586  0.01395 *
beh_stayhome                   3.7691104    1.8577889  2.0288  0.04248 *
beh_socgathering              -4.3915761    2.2316736 -1.9678  0.04909 *
beh_distance                   1.7291293    1.2440892  1.3899  0.16457  
beh_tellsymp                   0.0574909    1.5489442  0.0371  0.97039  
beh_handwash                  -4.4255514    2.4576044 -1.8008  0.07174 .
fob_curfew                    74.1821868   58.3881539  1.2705  0.20391  
incomelvlLMC                -141.4104774   58.6057055 -2.4129  0.01583 *
incomelvlUMC                 -73.6479444   69.0907199 -1.0660  0.28644  
incomelvlHIC                 -40.0951582   77.3458167 -0.5184  0.60419  
che:incomelvlLMC              25.8380200   12.2794892  2.1042  0.03536 *
che:incomelvlUMC              23.6900038   11.7720126  2.0124  0.04418 *
che:incomelvlHIC               8.9878346    9.9291733  0.9052  0.36536  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
stargazer(reg_ols_1, reg_ols_2, reg_ols_3, reg_ols_4, reg_ols_5,
          se = list(se_ols_1, se_ols_2, se_ols_3, se_ols_4, se_ols_5), 
          title = "Regression results (n=96)",
          omit = "Constant",
          keep.stat = "n",
          notes = "Robust standard errors in parentheses",
          type = 'latex')
Regression results
Dependent variable:
mortality
(1) (2) (3) (4) (5)
che 12.060*** 6.468 0.623 -4.451 -19.426**
(2.992) (4.319) (6.196) (7.279) (9.694)
pop65 2.919 4.255 6.052* 8.243**
(2.525) (2.977) (3.318) (3.352)
urban 0.537 0.019 0.424 -0.346
(0.755) (0.683) (0.738) (0.761)
doctors 0.979 0.167 0.383 0.568
(0.966) (0.851) (0.801) (0.678)
nurses -0.622** -0.415 -0.142 0.283
(0.291) (0.266) (0.269) (0.233)
dghe 0.377 0.196 -0.474 -0.351
(0.602) (0.527) (0.601) (0.547)
popdens -0.018** -0.004 -0.008 -0.002
(0.008) (0.006) (0.007) (0.005)
regionAmericas 138.996*** 109.000*** 69.798**
(41.076) (39.809) (31.886)
regionEastern Mediterranean 37.900 -16.688 -30.269
(32.463) (40.335) (26.359)
regionEurope 77.383** 42.148 -11.910
(37.860) (43.650) (42.038)
regionSouth-East Asia -2.281 -35.179 -58.488
(25.909) (37.191) (39.326)
regionWestern Pacific -36.447 -57.323 -87.725**
(35.140) (38.304) (35.681)
beh_stayhome 3.339* 3.769**
(1.792) (1.858)
beh_socgathering -4.828** -4.392**
(2.184) (2.232)
beh_distance 1.515 1.729
(1.080) (1.244)
beh_tellsymp 1.267 0.057
(1.609) (1.549)
beh_handwash -4.285* -4.426*
(2.389) (2.458)
fob_curfew 70.427 74.182
(73.735) (58.388)
incomelvlLMC -141.410**
(58.606)
incomelvlUMC -73.648
(69.091)
incomelvlHIC -40.095
(77.346)
che:incomelvlLMC 25.838**
(12.279)
che:incomelvlUMC 23.690**
(11.772)
che:incomelvlHIC 8.988
(9.929)
Observations 96 96 96 96 96
Note: *p<0.1; **p<0.05; ***p<0.01
Robust standard errors in parentheses
# Regressions (1)-(3) on a larger sample
many <- read_excel("che_covid19-new.xlsx")[,2:37] %>%
relocate(mortality, .before = `Country Code`)
many
reg_ols_11 <- lm(mortality ~ che, data = many) 

cov_ols_11 <- vcovHC(reg_ols_11, type = "HC0")
se_ols_11 <- sqrt(diag(cov_ols_11))

coeftest(reg_ols_11, df = Inf, vcov = cov_ols_11)

z test of coefficients:

            Estimate Std. Error z value   Pr(>|z|)    
(Intercept)  19.5433    17.0158  1.1485     0.2507    
che          10.4748     2.6424  3.9642 0.00007365 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
reg_ols_22 <- lm(mortality ~ che + pop65 + urban + doctors + nurses + dghe + popdens, data = many) 

cov_ols_22 <- vcovHC(reg_ols_22, type = "HC0")
se_ols_22 <- sqrt(diag(cov_ols_22))

coeftest(reg_ols_22, df = Inf, vcov = cov_ols_22)

z test of coefficients:

               Estimate  Std. Error z value  Pr(>|z|)    
(Intercept) -33.9768080  22.3549755 -1.5199 0.1285420    
che           2.5378506   2.2068385  1.1500 0.2501464    
pop65         5.6772044   1.9893389  2.8538 0.0043198 ** 
urban         0.7709671   0.4248587  1.8146 0.0695787 .  
doctors       0.7266031   0.7512339  0.9672 0.3334376    
nurses       -0.4870531   0.2520901 -1.9321 0.0533522 .  
dghe          0.3209735   0.3040956  1.0555 0.2911958    
popdens      -0.0185701   0.0049825 -3.7270 0.0001937 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
reg_ols_33 <- lm(mortality ~ che + pop65 + urban + doctors + nurses + dghe + popdens + region, data = many) 

cov_ols_33 <- vcovHC(reg_ols_33, type = "HC0")
se_ols_33 <- sqrt(diag(cov_ols_33))

coeftest(reg_ols_33, df = Inf, vcov = cov_ols_33)

z test of coefficients:

                               Estimate  Std. Error z value  Pr(>|z|)    
(Intercept)                 -28.9740418  20.3121227 -1.4264 0.1537411    
che                           0.9854109   2.0853514  0.4725 0.6365418    
pop65                         4.3977674   1.7977340  2.4463 0.0144337 *  
urban                         0.4661615   0.4049519  1.1512 0.2496694    
doctors                      -0.2366145   0.7367834 -0.3211 0.7481003    
nurses                       -0.3959232   0.2389396 -1.6570 0.0975192 .  
dghe                          0.4638272   0.3064896  1.5134 0.1301898    
popdens                      -0.0083562   0.0048076 -1.7381 0.0821876 .  
regionAmericas               97.1379134  26.9160554  3.6089 0.0003075 ***
regionEastern Mediterranean  30.7295222  15.4549796  1.9883 0.0467758 *  
regionEurope                 77.6972301  26.3780231  2.9455 0.0032240 ** 
regionSouth-East Asia         7.4116247  15.1387496  0.4896 0.6244313    
regionWestern Pacific       -26.5669946  14.5254225 -1.8290 0.0673996 .  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
stargazer(reg_ols_11, reg_ols_22, reg_ols_33,
          se = list(se_ols_11, se_ols_22, se_ols_33), 
          title = "Regression results (n=160)",
          omit = "Constant",
          keep.stat = "n",
          notes = "Robust standard errors in parentheses",
          type = 'latex')
Regression results (n=160)
Dependent variable:
mortality
(1) (2) (3)
che 10.475*** 2.538 0.985
(2.642) (2.207) (2.085)
pop65 5.677*** 4.398**
(1.989) (1.798)
urban 0.771* 0.466
(0.425) (0.405)
doctors 0.727 -0.237
(0.751) (0.737)
nurses -0.487* -0.396*
(0.252) (0.239)
dghe 0.321 0.464
(0.304) (0.306)
popdens -0.019*** -0.008*
(0.005) (0.005)
regionAmericas 97.138***
(26.916)
regionEastern Mediterranean 30.730**
(15.455)
regionEurope 77.697***
(26.378)
regionSouth-East Asia 7.412
(15.139)
regionWestern Pacific -26.567*
(14.525)
Observations 160 160 160
Note: *p<0.1; **p<0.05; ***p<0.01
Robust standard errors in parentheses

Hypothesis tests

car::linearHypothesis(reg_ols_5, "che + che:incomelvlLMC = 0", test="Chisq", white.adjust="hc0")
Linear hypothesis test

Hypothesis:
che  + che:incomelvlLMC = 0

Model 1: restricted model
Model 2: mortality ~ che + pop65 + urban + doctors + nurses + dghe + popdens + 
    region + beh_stayhome + beh_socgathering + beh_distance + 
    beh_tellsymp + beh_handwash + fob_curfew + incomelvl + che * 
    incomelvl

Note: Coefficient covariance matrix supplied.

  Res.Df Df Chisq Pr(>Chisq)
1     72                    
2     71  1 0.254     0.6143
car::linearHypothesis(reg_ols_5, "che + che:incomelvlUMC = 0", test="Chisq", white.adjust="hc0")
Linear hypothesis test

Hypothesis:
che  + che:incomelvlUMC = 0

Model 1: restricted model
Model 2: mortality ~ che + pop65 + urban + doctors + nurses + dghe + popdens + 
    region + beh_stayhome + beh_socgathering + beh_distance + 
    beh_tellsymp + beh_handwash + fob_curfew + incomelvl + che * 
    incomelvl

Note: Coefficient covariance matrix supplied.

  Res.Df Df  Chisq Pr(>Chisq)
1     72                     
2     71  1 0.1122     0.7376
car::linearHypothesis(reg_ols_5, "che + che:incomelvlHIC = 0", test="Chisq", white.adjust="hc0")
Linear hypothesis test

Hypothesis:
che  + che:incomelvlHIC = 0

Model 1: restricted model
Model 2: mortality ~ che + pop65 + urban + doctors + nurses + dghe + popdens + 
    region + beh_stayhome + beh_socgathering + beh_distance + 
    beh_tellsymp + beh_handwash + fob_curfew + incomelvl + che * 
    incomelvl

Note: Coefficient covariance matrix supplied.

  Res.Df Df  Chisq Pr(>Chisq)
1     72                     
2     71  1 2.6075     0.1064